In [1]:
# Imports 
import pandas as pd
import numpy as np
import tensorflow as tf
from nltk.corpus import stopwords
import re
from collections import Counter
import operator
from tensorflow.python.layers.core import Dense

In [2]:
def read_reviews():
    reviews = pd.read_csv("./Datasets/Reviews/Reviews.csv")
    reviews = reviews.dropna()
    reviews = reviews.drop(["Id","ProductId","UserId","ProfileName","HelpfulnessNumerator","HelpfulnessDenominator","Score","Time"]
                 ,axis=1)
    return reviews

reviews = read_reviews()
reviews.head()


Out[2]:
Summary Text
0 Good Quality Dog Food I have bought several of the Vitality canned d...
1 Not as Advertised Product arrived labeled as Jumbo Salted Peanut...
2 "Delight" says it all This is a confection that has been around a fe...
3 Cough Medicine If you are looking for the secret ingredient i...
4 Great taffy Great taffy at a great price. There was a wid...

In [3]:
reviews[reviews.isnull().any(axis=1)] # All cells have values


Out[3]:
Summary Text

In [4]:
# Cleaning and Normalizing the text and summaries
# Some contraction to expansion
contractions = { 
"ain't": "am not",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
"couldn't": "could not",
"couldn't've": "could not have",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"hadn't": "had not",
"hadn't've": "had not have",
"hasn't": "has not",
"haven't": "have not",
"he'd": "he would",
"he'd've": "he would have",
"he'll": "he will",
"he's": "he is",
"how'd": "how did",
"how'll": "how will",
"how's": "how is",
"i'd": "i would",
"i'll": "i will",
"i'm": "i am",
"i've": "i have",
"isn't": "is not",
"it'd": "it would",
"it'll": "it will",
"it's": "it is",
"let's": "let us",
"ma'am": "madam",
"mayn't": "may not",
"might've": "might have",
"mightn't": "might not",
"must've": "must have",
"mustn't": "must not",
"needn't": "need not",
"oughtn't": "ought not",
"shan't": "shall not",
"sha'n't": "shall not",
"she'd": "she would",
"she'll": "she will",
"she's": "she is",
"should've": "should have",
"shouldn't": "should not",
"that'd": "that would",
"that's": "that is",
"there'd": "there had",
"there's": "there is",
"they'd": "they would",
"they'll": "they will",
"they're": "they are",
"they've": "they have",
"wasn't": "was not",
"we'd": "we would",
"we'll": "we will",
"we're": "we are",
"we've": "we have",
"weren't": "were not",
"what'll": "what will",
"what're": "what are",
"what's": "what is",
"what've": "what have",
"where'd": "where did",
"where's": "where is",
"who'll": "who will",
"who's": "who is",
"won't": "will not",
"wouldn't": "would not",
"you'd": "you would",
"you'll": "you will",
"you're": "you are"
}
def normalization(review,remove_stopwords=False):
    text = review.lower()
    clean_text = []
    for word in text.split():
        if word in contractions:
            clean_text.append(contractions[word])
        else:
            clean_text.append(word)
    text = " ".join(clean_text)
    
    # Format words and remove unwanted characters
#     text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
    text = re.sub(r'https', ' ', text)
    text = re.sub(r'\<a href', ' ', text)
    text = re.sub(r'&amp;', '', text) 
    text = re.sub(r'[_"\-;%()|+&=*%.,!?:#$@\[\]/]', ' ', text)
    text = re.sub(r'<br', ' ', text)
    text = re.sub(r'/>', ' ', text)
    text = re.sub(r'>', ' ', text)
    text = re.sub(r'<', ' ', text)
    text = re.sub(r'`', ' ', text)
    text = re.sub(r'\'', ' ', text)
    
    if remove_stopwords:
        text = text.split()
        stops = set(stopwords.words("english"))
        text = [w for w in text if not w in stops]
        text = " ".join(text)

    return text

In [5]:
normalization(reviews.Text[713])


Out[5]:
'   http   www amazon com gp product b000gwlugu  plocky s tortilla chips  red beans  n rice  7 ounce bag  pack of 12   a  i first tasted these chips while visiting relatives in ky  they are not available where i live  so i ordered them from amazon  wow  my friends and family are all addicted to them  the spicy flavor grabs you at the first bite  once a bag is open  it is gone '

In [6]:
def clean_reviews(texts):
    return [normalization(text) for text in texts]

In [7]:
summary = clean_reviews(reviews.Summary)
text = clean_reviews(reviews.Text)

In [8]:
print("None count in Summary ",sum(x is None for x in summary))
print("None count in Text ",sum(x is None for x in text))
print(len(summary),len(text))


None count in Summary  0
None count in Text  0
568412 568412

In [9]:
#Counting the words in Text and summary and remove words having count less than threshold
def get_word_count(texts,summaries,threshold=20):
    '''
    Params: Tests , Summaries ,threshold = 20
    Return : word count dict
    '''
    tokens = []
    for text in texts:
        tokens.extend(text.split())
    for summary in summaries:
        tokens.extend(summary.split())
    counts = Counter(tokens)
    reduced_count = {word:i for word,i in counts.items() if i >= threshold}
    return reduced_count

In [10]:
count = get_word_count(text,summary)

In [11]:
count


Out[11]:
{'gunk': 136,
 'amped': 36,
 'skipjack': 59,
 'freaking': 272,
 'coney': 24,
 'discoloration': 34,
 'muncher': 34,
 'evaluating': 34,
 'japanese': 1934,
 'gunpowder': 527,
 'occur': 257,
 'mysteriously': 49,
 'soaks': 124,
 'newbie': 115,
 'tjs': 24,
 'nascar': 29,
 'drought': 26,
 'acne': 495,
 'embarrassingly': 30,
 'again': 55642,
 'billed': 125,
 'dietician': 33,
 'carroll': 23,
 'unpleasant': 2248,
 'hosting': 58,
 'colorful': 530,
 'pounder': 105,
 'idea': 9359,
 'b002ievjry': 42,
 'rate': 3724,
 'pinwheels': 25,
 'required': 1546,
 'athletic': 83,
 'sams': 397,
 'uhm': 34,
 'alpha': 166,
 'bitterly': 22,
 '7yr': 21,
 'zola': 95,
 'lanka': 177,
 'plumper': 37,
 'woodsy': 113,
 'chee': 26,
 'marsala': 36,
 'begins': 383,
 'waring': 62,
 'bubble': 1635,
 'solixir': 26,
 'usd': 89,
 'tuxedo': 48,
 'hyperactivity': 26,
 'alternate': 602,
 'dye': 616,
 'gladly': 414,
 'wax': 549,
 'meowing': 147,
 'internet': 2767,
 'capful': 21,
 'definately': 1770,
 'respective': 48,
 'inexpensive': 1892,
 'omit': 83,
 'afternoon': 4649,
 '214': 21,
 'endurance': 234,
 'hell': 840,
 'devoid': 125,
 'filtering': 89,
 'palettes': 30,
 'knox': 91,
 'medicinal': 778,
 'indications': 37,
 'induction': 37,
 'flavonoids': 116,
 'folded': 179,
 'realm': 104,
 'bypassed': 23,
 'liquors': 51,
 'tagline': 21,
 'jerkey': 442,
 'voltage': 22,
 'boodum': 28,
 'accented': 37,
 'confessions': 23,
 'beloved': 662,
 'perrier': 92,
 'gogi': 22,
 'kitchens': 344,
 'ow': 21,
 'angle': 186,
 'catalog': 107,
 'movement': 262,
 'tasty': 37146,
 'edited': 248,
 'licorice': 7124,
 'pharma': 29,
 'raging': 47,
 'courser': 23,
 'hmm': 310,
 'cheesey': 66,
 'drum': 77,
 'protecting': 147,
 'clip': 313,
 'splinter': 146,
 'nondescript': 53,
 'richer': 1153,
 'drippings': 67,
 'notified': 247,
 'damp': 297,
 'interview': 76,
 'heinous': 20,
 'dane': 255,
 'ethical': 211,
 'travels': 292,
 'liver': 4429,
 'amout': 34,
 'characterize': 32,
 'prove': 470,
 'youth': 406,
 'understandably': 49,
 'samu': 28,
 'carbquik': 32,
 'deetz': 42,
 'embossed': 34,
 'cubic': 23,
 'betcha': 63,
 'shunt': 25,
 'affectionately': 21,
 'chin': 117,
 'aaa': 58,
 'itched': 31,
 'mcvitie': 57,
 'tumbler': 72,
 'original': 10199,
 'autolyzed': 84,
 'displayed': 215,
 'izzy': 77,
 'zp': 114,
 'johnny': 84,
 'kibbles': 656,
 'clogged': 181,
 '1990': 72,
 'emanating': 26,
 'stinking': 67,
 'zoe': 368,
 'leisure': 43,
 'finish': 5689,
 'bon': 273,
 'frankie': 31,
 'contracting': 29,
 'yang': 30,
 'hudson': 26,
 'distinguished': 41,
 'insult': 154,
 'laugh': 423,
 'founded': 140,
 'smoke': 1563,
 '~1': 37,
 'rapidly': 279,
 'lifespans': 129,
 'charms': 212,
 'aggressive': 516,
 'padding': 235,
 'glands': 94,
 'confit': 25,
 '050': 21,
 'monitor': 310,
 'dabur': 24,
 'terrifically': 22,
 'virtually': 928,
 'soledad': 44,
 'pixy': 29,
 'serums': 29,
 'mn': 101,
 'evanger': 34,
 'promo': 117,
 'carmine': 38,
 'refresh': 118,
 'unaware': 164,
 'fiasco': 31,
 'subtler': 34,
 'sunsweet': 31,
 'complications': 78,
 'effectively': 292,
 'handing': 162,
 'disorders': 264,
 'entrails': 30,
 'ace': 82,
 'collace': 20,
 'unused': 347,
 'forking': 30,
 'rebate': 33,
 'limes': 213,
 'sincere': 41,
 'haired': 155,
 'os': 158,
 'grocery': 21280,
 'ladybug': 40,
 'feared': 109,
 'choir': 27,
 'decafe': 132,
 'sensation': 759,
 'shun': 36,
 'sprint': 34,
 'tenderness': 99,
 'cuts': 928,
 'odor': 2757,
 'disorder': 309,
 'b001chfudc': 38,
 'mmm': 678,
 'himilayan': 24,
 'started': 15617,
 'mg': 2658,
 'intestinal': 498,
 'jimmies': 285,
 'choice': 11226,
 'dissapointment': 110,
 'richly': 98,
 'oxidant': 162,
 'tots': 231,
 'ward': 127,
 'oyster': 230,
 'tucker': 27,
 'ski': 52,
 'paradise': 131,
 'rig': 26,
 'od': 102,
 'wafting': 50,
 'usa': 3585,
 'crunch': 7868,
 'surely': 755,
 'tahitian': 111,
 'mandated': 22,
 'fisherman': 36,
 'shows': 1904,
 'pads': 183,
 'observations': 98,
 'drastic': 125,
 'thirsty': 484,
 'twitter': 32,
 'peices': 113,
 'conducted': 103,
 'needs': 8610,
 'headache': 683,
 'helpers': 33,
 'promoting': 187,
 'squish': 80,
 'nut': 6813,
 'heating': 760,
 'font': 58,
 'draws': 85,
 'indonesian': 102,
 'vegies': 128,
 'convinient': 33,
 'operate': 159,
 'product': 187840,
 'ahi': 66,
 'salad': 6240,
 'dimensional': 96,
 'kikkoman': 168,
 'anxiously': 126,
 'epoxy': 23,
 'considerable': 312,
 'surviving': 51,
 'heathy': 81,
 'treatment': 794,
 'via': 3776,
 'ethically': 160,
 'bent': 270,
 'park': 656,
 'smear': 170,
 'kippered': 34,
 'blogger': 40,
 'absolutly': 146,
 'ms': 605,
 'zotz': 64,
 'dressing': 4026,
 'proplan': 52,
 'cracked': 1055,
 'supersaver': 106,
 'airedale': 58,
 'differance': 33,
 'fringe': 23,
 'gardening': 122,
 'issuing': 23,
 'h20': 52,
 'aerogrow': 99,
 'busy': 3064,
 '80mg': 60,
 'party': 3839,
 'presenting': 46,
 'charger': 133,
 'remaining': 1495,
 'shrinkage': 20,
 'mercken': 54,
 'shipments': 944,
 'appassionato': 25,
 'territorial': 28,
 'ltd': 28,
 'defunct': 21,
 'concert': 33,
 'needle': 341,
 'patients': 286,
 'centers': 191,
 'unpalatable': 203,
 'plain': 10171,
 'component': 421,
 'winds': 76,
 'nine': 908,
 'baileys': 46,
 'robbed': 47,
 'speculation': 21,
 'limbs': 27,
 'pees': 24,
 'patties': 447,
 'dreamfield': 52,
 'talk': 1168,
 'aranciata': 23,
 'targeted': 72,
 'spotted': 230,
 'digestives': 65,
 'nm': 36,
 'pancakes': 3748,
 'everyday': 4375,
 'cliche': 27,
 'personally': 4546,
 'reformulate': 33,
 'mommy': 413,
 'faq': 46,
 'advertisment': 21,
 'wool': 72,
 'scarcely': 21,
 'unedible': 77,
 'grandchild': 35,
 'anyway': 7202,
 'eighties': 21,
 'gail': 36,
 'subjects': 40,
 'wonderful': 29897,
 'plentiful': 212,
 'stinks': 495,
 'thin': 6135,
 'marathons': 55,
 'lag': 23,
 'property': 142,
 'momma': 97,
 'pulls': 180,
 'dept': 92,
 'mikels': 28,
 'immediatly': 30,
 'persists': 27,
 'smore': 23,
 'mislabeled': 100,
 'panni': 31,
 'minerals': 2055,
 'mixtures': 146,
 'finum': 23,
 '9g': 254,
 'furthermore': 255,
 'generated': 82,
 'novel': 278,
 'avoiding': 494,
 'substitutes': 758,
 'teeth': 11363,
 'resides': 27,
 'bullmastiff': 39,
 'resembles': 354,
 'sensible': 233,
 'docked': 36,
 'dudes': 36,
 'staples': 305,
 'margarita': 873,
 'appreciating': 37,
 'dagoba': 75,
 'rbst': 24,
 'cult': 68,
 'purchasers': 42,
 'abbey': 63,
 'fantasic': 25,
 'distance': 385,
 'contented': 46,
 'ungodly': 26,
 'hiatus': 20,
 'donating': 165,
 'hips': 1361,
 'homeade': 43,
 'ecc': 38,
 'firm': 1979,
 'rot': 124,
 'whitish': 68,
 'distributed': 426,
 'met': 1158,
 'cookin': 28,
 'marbled': 26,
 'herdez': 38,
 'clipping': 42,
 'olives': 2249,
 'hyperthyroidism': 27,
 'material': 1185,
 'superb': 1762,
 'noooo': 34,
 'soba': 198,
 'adjustment': 184,
 'almondina': 29,
 'bisuifite': 20,
 'slapping': 33,
 'trip': 3318,
 'starters': 231,
 'stranger': 115,
 'stem': 234,
 'casino': 33,
 'discuss': 111,
 'swelling': 116,
 'smacks': 80,
 'ingesting': 294,
 'rite': 162,
 'screwing': 51,
 'determine': 496,
 'troubling': 41,
 'brother': 1479,
 'claiming': 316,
 'unusually': 183,
 'acquisition': 27,
 'fillers': 2123,
 'squeezy': 24,
 'fakelime': 28,
 'mushed': 48,
 'two': 50257,
 'housed': 26,
 'revert': 59,
 'throw': 5985,
 'groomer': 124,
 'disastrous': 25,
 'comprises': 44,
 'capresso': 112,
 'restless': 141,
 'disposed': 101,
 'angled': 22,
 'europeans': 40,
 'canisters': 718,
 'crying': 550,
 'positive': 3093,
 'bbq': 4736,
 'child': 3770,
 'prevented': 111,
 'tuscan': 179,
 'appetit': 116,
 'locust': 25,
 'bride': 62,
 'jawbreaker': 26,
 'warts': 28,
 'suspicions': 31,
 'bengal': 214,
 'hydroponics': 21,
 'injuries': 28,
 'hygienist': 27,
 'avaliable': 22,
 'vacuuming': 21,
 'reddenbacher': 35,
 'isles': 43,
 'searching': 2673,
 'mesh': 434,
 'canadian': 506,
 'accompany': 153,
 'poppin': 34,
 'albertsons': 84,
 'sourcing': 117,
 'physically': 260,
 'scrumptious': 650,
 'm': 8719,
 'imparts': 180,
 'bicycle': 87,
 'quality': 41132,
 'vinaigrettes': 26,
 'petits': 30,
 'crafty': 32,
 'gp': 16031,
 'tuned': 64,
 'undaunted': 20,
 'modifying': 23,
 'pastilles': 40,
 'depression': 219,
 'linguini': 44,
 'quit': 1340,
 'disclosing': 45,
 'makhani': 30,
 'sneaks': 93,
 'calendar': 36,
 'unrecognizable': 77,
 'assuredly': 26,
 'ore': 24,
 'critic': 92,
 'trailer': 54,
 'snobby': 42,
 'toniq': 21,
 'creeps': 35,
 'cleveland': 44,
 'measurable': 24,
 'triscuits': 87,
 'dense': 1763,
 'performing': 55,
 'recycles': 21,
 'boasts': 169,
 'batman': 140,
 '27th': 28,
 'lone': 62,
 'winston': 20,
 'fighting': 399,
 'supplying': 84,
 'defect': 139,
 'whey': 1482,
 'importing': 65,
 'allowing': 589,
 'toddy': 95,
 'heathful': 32,
 'doa': 23,
 'bathed': 57,
 'subway': 97,
 'debated': 79,
 'c': 5436,
 'frequent': 905,
 'refund': 2362,
 'heighten': 25,
 'medications': 773,
 'reminiscing': 31,
 'dill': 549,
 'using': 31746,
 'adding': 8189,
 'nile': 34,
 'mountian': 40,
 'oolong': 1410,
 'discourage': 115,
 'tarrazu': 87,
 'sounding': 149,
 'stinkers': 29,
 'convience': 110,
 'cartons': 513,
 'cuban': 132,
 'dementia': 43,
 'crowd': 515,
 'itches': 30,
 'ave': 40,
 'educate': 93,
 'bentley': 173,
 'stirs': 49,
 'pasteurized': 183,
 'succumbed': 20,
 'sticky': 3399,
 'oatmeal': 10806,
 'god': 1826,
 'mudslide': 167,
 'snackmasters': 112,
 'pricing': 1997,
 'guarding': 26,
 'jonesing': 50,
 'packet': 7165,
 'bugged': 25,
 'flat': 2686,
 'notice': 5389,
 'honees': 97,
 'hara': 75,
 'shopped': 333,
 '1519': 44,
 'inspecting': 50,
 'cosmetics': 73,
 'warmer': 303,
 '280': 86,
 'toned': 108,
 'establishment': 38,
 'kickstarter': 20,
 'suzie': 26,
 'stronger': 5655,
 'mmmmmm': 394,
 'sufferer': 63,
 '20gum': 25,
 'noon': 231,
 'greg': 35,
 'monopoly': 50,
 'filing': 42,
 'improves': 386,
 'raspberries': 614,
 'employed': 56,
 'roller': 655,
 'rolling': 591,
 'adores': 389,
 'hon': 34,
 'averaging': 47,
 'downgraded': 48,
 'fizzies': 89,
 'footed': 49,
 'migrains': 21,
 'samosas': 26,
 'doctoring': 121,
 'energizer': 66,
 'sooooo': 723,
 'haley': 47,
 'signing': 178,
 'jasmine': 2572,
 'puff': 494,
 'emphasized': 27,
 'duty': 402,
 'related': 902,
 'greyish': 28,
 'glee': 91,
 'scorch': 37,
 'gizzards': 22,
 'wards': 24,
 'imitation': 438,
 'chachere': 45,
 'theo': 97,
 'frankly': 1027,
 'potful': 36,
 'concerning': 298,
 'metabolism': 680,
 'studies': 784,
 'choked': 355,
 'serious': 2325,
 'yankee': 62,
 'dietitian': 47,
 'arthritis': 861,
 'crips': 26,
 'ea': 166,
 'murky': 122,
 'sewing': 22,
 'marinara': 352,
 'tantalizing': 109,
 'goldendoodle': 103,
 'reactive': 47,
 'clothes': 442,
 'uncomfortably': 34,
 'salesperson': 20,
 'pleasure': 1647,
 'tempura': 36,
 'leukemia': 35,
 'finals': 57,
 'fillets': 223,
 '100': 11983,
 'degradable': 28,
 'commonly': 443,
 'hund': 37,
 'le': 237,
 'dramatically': 339,
 'lesson': 578,
 'pilling': 75,
 'cheerfully': 24,
 'tip': 1456,
 'fairly': 5001,
 'owe': 244,
 'palmer': 96,
 'persons': 163,
 'devouring': 176,
 'cronic': 28,
 'clubs': 116,
 '9s': 29,
 'disappoint': 1096,
 'chore': 280,
 'students': 410,
 'unit': 1306,
 'unchewable': 41,
 'splurge': 435,
 'warming': 439,
 'mines': 37,
 'pic': 145,
 'swollen': 122,
 'lists': 801,
 'cats': 31772,
 'flaver': 117,
 'rosehip': 80,
 'tense': 59,
 'locks': 118,
 'dedicated': 369,
 'multi': 1445,
 'harden': 209,
 'zinfandel': 33,
 'pouts': 23,
 'handicapped': 23,
 'does': 76722,
 'nova': 38,
 'scary': 470,
 'constipate': 32,
 'olestra': 21,
 'ridiculously': 740,
 'spoil': 501,
 'sean': 21,
 'motel': 109,
 'ingedients': 32,
 '53': 185,
 'binds': 20,
 'crevices': 27,
 'disintegrates': 33,
 'numbers': 595,
 'twists': 546,
 'whereas': 814,
 'elbow': 145,
 'steering': 38,
 'policy': 584,
 'knee': 247,
 'traced': 21,
 'sanity': 115,
 'reputation': 493,
 'arrowhead': 289,
 'blob': 150,
 'scotland': 200,
 'beats': 1768,
 'barking': 137,
 'naysayers': 32,
 'caking': 79,
 'belle': 39,
 'authorities': 22,
 'vitaspelt': 20,
 'gawd': 27,
 'sanka': 95,
 'drove': 164,
 'crispbreads': 73,
 'silk': 711,
 'layers': 495,
 'apso': 94,
 'nothin': 60,
 'basset': 115,
 'casein': 322,
 'gullet': 27,
 'doggies': 438,
 'goodies': 759,
 'fluffy': 1306,
 'sideways': 77,
 'sally': 47,
 'extracts': 457,
 'predicted': 39,
 'exported': 27,
 'alfalfa': 345,
 'spaghetti': 2011,
 'wilted': 61,
 'acts': 582,
 'fineness': 20,
 'aztec': 45,
 'fountains': 33,
 'classmates': 49,
 'sooth': 149,
 'valrhona': 90,
 'cookout': 43,
 'together': 6848,
 'prepping': 38,
 'snapea': 112,
 'yakisoba': 116,
 'undrinkable': 524,
 'kitties': 1806,
 'otis': 54,
 'paranoid': 90,
 'commentary': 53,
 'became': 3604,
 'processing': 1102,
 'detest': 79,
 'regretfully': 27,
 'episodes': 191,
 'magazines': 47,
 'rigid': 80,
 'broccoli': 1650,
 'emulsifier': 142,
 'thanks': 10925,
 'captivating': 52,
 'maltose': 32,
 'stopper': 65,
 'seattles': 23,
 'shoved': 75,
 'preventive': 23,
 'achy': 24,
 'unwrapped': 141,
 'scarf': 260,
 'redmill': 24,
 'balances': 160,
 'lawry': 181,
 'these': 247317,
 'hg': 20,
 'aggravating': 42,
 'multitude': 120,
 'tamped': 32,
 'plunge': 228,
 'orijen': 1101,
 'overwhelm': 416,
 'unmarked': 77,
 'saut': 138,
 'raman': 42,
 'guarna': 22,
 'nylabone': 690,
 'tortured': 30,
 'chix': 54,
 'aztecs': 21,
 'mealtime': 151,
 'diseased': 76,
 'group': 1090,
 'closed': 1076,
 'printer': 20,
 'oxygen': 185,
 'yawning': 35,
 'bertie': 100,
 'beverage': 4383,
 'reputed': 39,
 '065': 22,
 'frise': 131,
 'surveys': 32,
 'softness': 208,
 'anthony': 31,
 'timothy': 2300,
 'fairway': 29,
 'd3': 214,
 'metamucel': 25,
 'magical': 368,
 'waste': 8123,
 'dietary': 2052,
 'forever': 1996,
 '49': 781,
 'barleans': 23,
 'gastronomic': 22,
 'buttery': 1887,
 'knuckles': 36,
 'crf': 96,
 'dispensers': 173,
 'honeydew': 76,
 'colored': 1535,
 'oranges': 428,
 'al': 819,
 'yerba': 400,
 'closes': 107,
 'plasticy': 48,
 'boring': 1359,
 'realemon': 29,
 'subsitute': 127,
 'runnier': 84,
 'jif': 249,
 'kneaded': 28,
 'pbj': 86,
 'shard': 20,
 'b': 4844,
 'smelliest': 21,
 'rhymes': 24,
 'sockeye': 85,
 'cycle': 871,
 'carl': 30,
 'edible': 2268,
 'admission': 26,
 'potatoe': 260,
 'ga': 158,
 'distracting': 74,
 'rediscovered': 41,
 'displeasing': 25,
 'stirfry': 30,
 'higly': 31,
 'conditioners': 321,
 'ravioli': 275,
 'spouse': 264,
 'flight': 323,
 'lest': 107,
 'garibaldi': 25,
 'location': 494,
 'azalea': 24,
 'gentlease': 138,
 'fails': 393,
 'vis': 33,
 'runts': 98,
 'shabby': 116,
 'undigested': 60,
 'barr': 24,
 'flatter': 96,
 'concentrating': 25,
 'evian': 52,
 'goth': 41,
 'american': 3732,
 'monohydrochloride': 20,
 'relax': 849,
 'paragraph': 61,
 'rudy': 70,
 'youll': 41,
 'stringent': 53,
 'xlear': 72,
 'stirred': 496,
 'misshapen': 61,
 'enticing': 290,
 'appreciable': 35,
 'acidity': 1075,
 'midday': 139,
 'dirt': 1004,
 'ppl': 42,
 'inactive': 44,
 'diverticulitis': 24,
 'coaxed': 32,
 'bleah': 25,
 'spooning': 48,
 'passing': 478,
 'heaviest': 32,
 'opportunities': 75,
 'uniform': 340,
 'brightens': 31,
 'warrior': 90,
 'jerkies': 158,
 'feat': 76,
 'olio': 64,
 'comparison': 2951,
 'deficiencies': 48,
 'moos': 41,
 'hots': 111,
 'noticeably': 541,
 'shake': 4538,
 'doable': 63,
 'neater': 52,
 'grapefruit': 1097,
 'gatorade': 862,
 'teething': 582,
 'hanger': 23,
 'research': 5407,
 'revolving': 21,
 'dm': 26,
 'acacia': 76,
 'mile': 666,
 'tyson': 29,
 'mannheim': 21,
 'premier': 217,
 'riley': 42,
 'heartbroken': 50,
 'closure': 148,
 'rushes': 28,
 'bottoms': 174,
 'cbtl': 122,
 'exaggeration': 138,
 'clarifying': 32,
 'prolonged': 109,
 'shelved': 20,
 'kindness': 69,
 'meaningful': 34,
 'sprung': 64,
 'herpes': 42,
 'sincerely': 306,
 '10am': 24,
 'bumps': 183,
 'freshner': 24,
 'stalk': 89,
 '74': 156,
 'louisiana': 438,
 'purity': 177,
 'rootbeer': 202,
 'vegetarians': 437,
 'flavour': 2518,
 'essiac': 26,
 'distribution': 328,
 'seinfeld': 23,
 'shorts': 49,
 'heretofore': 20,
 'soggy': 1312,
 'ragged': 38,
 'dissappointing': 51,
 'grasses': 87,
 'gushing': 21,
 'dentist': 379,
 'phew': 39,
 'yums': 36,
 'swish': 68,
 'balm': 376,
 'bissinger': 34,
 'greedily': 44,
 'fertilizer': 150,
 'gray': 565,
 'lacking': 1544,
 'charlee': 107,
 'joint': 725,
 'diarreha': 27,
 'stong': 141,
 'chocolate': 67509,
 'graphics': 58,
 'retain': 409,
 'pinches': 62,
 'hermetically': 37,
 'connections': 27,
 'lugging': 149,
 'dts': 26,
 'ricotta': 74,
 'definitive': 88,
 'apprehensive': 193,
 'trappist': 39,
 'ac': 52,
 'anorexic': 28,
 'ballerina': 21,
 'mmmmh': 25,
 'injection': 40,
 'tack': 44,
 'paycheck': 46,
 'evaporates': 34,
 'dum': 55,
 'th': 149,
 'undeliverable': 23,
 'deliciously': 634,
 'cacoa': 27,
 'swings': 85,
 'yoohoo': 60,
 'crumble': 1063,
 'appreciate': 3389,
 'gardeners': 20,
 'powerfully': 39,
 'resturant': 78,
 'court': 84,
 'gusto': 845,
 'meant': 1969,
 ...}

In [12]:
def get_vocab(word_counts):
    '''
    Param: word_counts
    Return: Vocab,vocab_to_int,int_to_vocab
    '''
    vocab = set(word_counts.keys())
    
    vocab_to_int = {}
    int_to_vocab = {}
    
    codes = ["<UNK>","<PAD>","<EOS>","<GO>"]
    for i,code in enumerate(codes):
        vocab_to_int[code] = i

    for i,word in enumerate(vocab,4):
        vocab_to_int[word] = i
        
    int_to_vocab = {i:word for word,i in vocab_to_int.items()}
    return vocab,vocab_to_int,int_to_vocab

In [13]:
vocab,vocab_to_int,int_to_vocab = get_vocab(count)

In [14]:
print(len(vocab),len(vocab_to_int),len(int_to_vocab))


22726 22730 22730

In [15]:
# Using pre-trained Conceptnet Numberbatch's Embeddings (https://github.com/commonsense/conceptnet-numberbatch)
def get_word_embeddings():
    embeddings = {}
    with open('./Datasets/embeddings/numberbatch-en-17.06.txt',encoding='utf-8') as em:
        for embed in em:
            em_line = embed.split(' ')
            if len(em_line) > 2: # First line of file is no. of words , number of dimensions
                word = em_line[0]
                embedding = np.array(em_line[1:])
                embeddings[word] = embedding
    print('Word embeddings:', len(embeddings))
    return embeddings

In [16]:
CN_embeddings = get_word_embeddings()


Word embeddings: 417194

In [17]:
not_in_embeddings = [word for word in vocab if word not in CN_embeddings]

In [18]:
print("No. of words not in Ebeddings : ",len(not_in_embeddings))


No. of words not in Ebeddings :  2759

In [19]:
def create_embedding_matrix(int_to_vocab,embeddings,embedding_dim = 300):
    '''
    Params : int_to_vocab, embeddings, embedding_dim
    Return : embedding matrix
    '''
    # Generating empty numpy matrix
    embeding_matrix = np.zeros([len(vocab_to_int),embedding_dim])
    embeding_matrix = embeding_matrix.astype(np.float32)
    
    #Generating random embeddings for words not in CN embeddings
    for i,word in int_to_vocab.items():
        if word in embeddings:
            embeding_matrix[i] = embeddings[word]
        else:
            embeding_matrix[i] = np.array(np.random.normal(embedding_dim))
    return embeding_matrix

In [20]:
embeding_matrix = create_embedding_matrix(int_to_vocab,CN_embeddings)

In [21]:
print(len(embeding_matrix),len(vocab_to_int))


22730 22730

In [22]:
def encode_source_target(sources, targets, vocab_to_int):
    '''
    Params : Sources, Targets, vocab_to_int
    Return :encoded_sources, encoded_targets
    '''
    encoded_sources = []
    encoded_targets = []
    for source in sources:
        encod_ent = []
        for word in source.split():
            if word in vocab_to_int:
                encod_ent.append(vocab_to_int[word])
            else:
                encod_ent.append(vocab_to_int["<UNK>"])
        encoded_sources.append(encod_ent)
    
    for target in targets:
        encod_ent = []
        for word in target.split():
            if word in vocab_to_int:
                encod_ent.append(vocab_to_int[word])
            else:
                encod_ent.append(vocab_to_int["<UNK>"])
        encoded_targets.append(encod_ent)
        
    return encoded_sources, encoded_targets

In [23]:
encoded_sources, encoded_targets = encode_source_target(text,summary,vocab_to_int)

In [24]:
print(len(encoded_sources),len(text))


568412 568412

Model


In [25]:
# Building Input Placeholders
def model_inputs():
    '''
    Returns : input_,target,learning_rate,keep_prob,source_seq_length,target_seq_length,max_target_seq_length
    '''
    input_ = tf.placeholder(dtype=tf.int32,shape=(None,None),name="inputs")
    target = tf.placeholder(dtype=tf.int32,shape=(None,None),name="target")
    
    learning_rate = tf.placeholder(dtype=tf.float32,name="learning_rate")
    keep_prob = tf.placeholder(dtype=tf.float32,name="keep_prob")
    
    source_seq_length = tf.placeholder(dtype=tf.int32,shape=(None,),name="source_seq_length")
    target_seq_length = tf.placeholder(dtype=tf.int32,shape=(None,),name="target_seq_length")
    
    max_target_seq_length = tf.reduce_max(target_seq_length,name="max_target_seq_length")
    return input_,target,learning_rate,keep_prob,source_seq_length,target_seq_length,max_target_seq_length

In [26]:
#Process decoder input
def process_decoder_input(target_data,vocab_to_int,batch_size):
    
    strided_target = tf.strided_slice(target_data,(0,0),(batch_size,-1),(1,1))
    go = tf.fill(value=vocab_to_int["<GO>"],dims=(batch_size,1))
    decoder_input = tf.concat((go,strided_target),axis=1)
    return decoder_input

In [124]:
def encoding_layer(embeded_rnn_input,rnn_size,keep_prob,num_layers,batch_size,source_sequence_length):

    def get_lstm(rnn_size,keep_prob=0.7):
        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm,input_keep_prob=keep_prob)
        return drop
    #     forward lstm layer
    cell_fw = tf.contrib.rnn.MultiRNNCell([get_lstm(rnn_size,keep_prob) for _ in range(num_layers)])

    #     backward lstm layer
    cell_bw = tf.contrib.rnn.MultiRNNCell([get_lstm(rnn_size,keep_prob) for _ in range(num_layers)])
    
    ((encoder_fw_outputs,
              encoder_bw_outputs),
             (encoder_fw_state,
              encoder_bw_state)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw,cell_bw=cell_bw,inputs=embeded_rnn_input,
                                    sequence_length=source_sequence_length,dtype=tf.float32)
                                                                     
    encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)
    
    encoder_states = []
    
    for i in range(num_layers):
        if isinstance(encoder_fw_state[i],tf.contrib.rnn.LSTMStateTuple):
            encoder_state_c = tf.concat(values=(encoder_fw_state[i].c,encoder_bw_state[i].c),axis=1,name="encoder_fw_state_c")
            encoder_state_h = tf.concat(values=(encoder_fw_state[i].h,encoder_bw_state[i].h),axis=1,name="encoder_fw_state_h")
            encoder_state = tf.contrib.rnn.LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)
        elif isinstance(encoder_fw_state[i], tf.Tensor):
            encoder_state = tf.concat(values=(encoder_fw_state[i], encoder_bw_state[i]), axis=1, name='bidirectional_concat')
        
        encoder_states.append(encoder_state)
    
    encoder_states = tuple(encoder_states)
    
    return encoder_outputs,encoder_states

In [157]:
def training_decoder(dec_embed_input,decoder_cell,encoder_state, output_layer,
                     target_sequence_length,max_target_length):
    
    
    helper = tf.contrib.seq2seq.TrainingHelper(dec_embed_input,target_sequence_length)
    
    decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell,helper,initial_state=encoder_state,
                                              output_layer=output_layer)
    
    
    (final_outputs, final_state, final_sequence_lengths) = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,impute_finished=True,
                                                     maximum_iterations=max_target_length)
    
    return final_outputs

In [152]:
def inference_decoder(embeddings,decoder_cell,encoder_state,output_layer,vocab_to_int,
                      max_target_length,batch_size):
    
    start_tokens = tf.tile(tf.constant(dtype=tf.int32,value=[vocab_to_int["<GO>"]]),
                           multiples=[batch_size],name="start_tokens")
    
    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embeddings,
                                                      start_tokens=start_tokens,
                                                      end_token=vocab_to_int["<EOS>"])
    
    decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell,helper,initial_state=encoder_state,
                                              output_layer=output_layer)
    
    (final_outputs, final_state, final_sequence_lengths) = tf.contrib.seq2seq.dynamic_decode(decoder,impute_finished=True,
                                                  maximum_iterations=max_target_length)
    return final_outputs

In [156]:
def decoding_layer(target_inputs,encoder_state,embedding,vocab_to_int,rnn_size,target_sequence_length,max_target_length,
                   batch_size,num_layers):
    
    def get_lstm(rnn_size,keep_prob=0.7):
        rnn_size = 2 * rnn_size
        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm,input_keep_prob=keep_prob)
        return drop
    vocab_len = len(vocab_to_int)
    decoder_cell = tf.contrib.rnn.MultiRNNCell([get_lstm(rnn_size) for _ in range(num_layers)])
    output_layer = Dense(vocab_len,kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
    
    
    embed = tf.nn.embedding_lookup(embedding,target_inputs)
    
    with tf.variable_scope("decoding"):
        
        training_logits = training_decoder(embed,decoder_cell,encoder_state,output_layer,
                                         target_sequence_length,max_target_length)
    
        
    with tf.variable_scope("decoding",reuse=True):
        
        inference_logits = inference_decoder(embedding,decoder_cell,encoder_state,output_layer,vocab_to_int,
                                          max_target_length,batch_size)
    
    return training_logits, inference_logits

In [154]:
def seq2seq_model(source_input,target_input,embeding_matrix,vocab_to_int,source_sequence_length,
                  target_sequence_length,max_target_length, rnn_size,keep_prob,num_layers,batch_size):
    '''
    Params : source_input,target_input,embeding_matrix,vocab_to_int,source_sequence_length,
                  target_sequence_length,max_target_length, rnn_size,keep_prob,num_layers,batch_size
    
    Return : training_logits, inference_logits
    '''
    embedings = embeding_matrix
    embed = tf.nn.embedding_lookup(embedings,source_input)
    
    encoder_output,encoder_states = encoding_layer(embed,rnn_size,keep_prob,num_layers,
                                                   batch_size,source_sequence_length)
    
    training_logits, inference_logits = decoding_layer(target_input,encoder_states,embedings,
                                                                vocab_to_int,rnn_size,target_sequence_length,
                                                                max_target_length,batch_size,num_layers)
    
    return training_logits, inference_logits

Batching


In [57]:
# Sorting the text and summary for better padding
# sort based on length of length of text
def sort_text_summary(texts,summaries):
    text_length = [(i,text,len(text)) for i,text in enumerate(texts)]
    text_length.sort(key=operator.itemgetter(2))
    
    sorted_text = [text for i,text,length in text_length]
    sorted_summary = []
    for i,text,length in text_length:
        sorted_summary.append(summaries[i])
    return sorted_text,sorted_summary

In [58]:
sorted_text, sorted_summary = sort_text_summary(encoded_sources,encoded_targets)

In [165]:
len(sorted_text)


Out[165]:
568412

In [60]:
# Padding batches
def pad_sentence_batch(sentence_batch):
    max_length = max([len(sent) for sent in sentence_batch])
    padded_sentences = []
    for sent in sentence_batch:
        sent_len = len(sent)
        if len(sent) < max_length:
            padded_sentences.append(sent + [vocab_to_int["<PAD>"] for _ in range(max_length - sent_len)])
        else:
            padded_sentences.append(sent)
    return padded_sentences

In [61]:
def get_batches(encoded_sources, encoded_targets, batch_size):
    
    '''
    Params : encoded_sources, encoded_targets, batch_size
    Return : text_batch,summary_batch,source_seq_len,target_seq_len
    '''
    
    sorted_text, sorted_summary = sort_text_summary(encoded_sources,encoded_targets)
    
    batch_count = len(sorted_text)//batch_size
    
    for i in range(batch_count):
        start = i * batch_size
        end = start + batch_size
        
        text_batch = np.array(pad_sentence_batch(sorted_text[start:end]))
        summary_batch = np.array(pad_sentence_batch(sorted_summary[start:end]))
        
        source_seq_len = [len(sent) for sent in text_batch]
        target_seq_len = [len(sent) for sent in summary_batch]
        
        yield (text_batch,summary_batch,source_seq_len,target_seq_len)

In [211]:
# Hyperparametrs
epochs = 10
batch_size = 512
rnn_size = 256
num_layers = 2
learn_rate = 0.01
keep_probability = 0.75

#Model save path
save_path = 'models/model'

display_step = 5

In [203]:
# Build Graph

train_graph = tf.Graph()
with train_graph.as_default():
    
    # Load the model inputs   
    input_,target,learning_rate,keep_prob,source_seq_length,target_seq_length,max_target_seq_length = model_inputs()
    
    # Create the training and inference logits
    training_logits, inference_logits = seq2seq_model(input_,target,embeding_matrix,vocab_to_int,source_seq_length,target_seq_length,
                  max_target_seq_length,rnn_size,keep_probability,num_layers,batch_size)
    
    # Create tensors for the training logits and inference logits
    training_logits = tf.identity(training_logits.rnn_output, name='logits')
    inference_logits = tf.identity(inference_logits.sample_id, name='predictions')
    
    masks = tf.sequence_mask(target_seq_length, max_target_seq_length, dtype=tf.float32, name='masks')

    with tf.name_scope("optimization"):
        cost = tf.contrib.seq2seq.sequence_loss(training_logits,target,masks)
        optimizer=tf.train.AdamOptimizer(learning_rate)
        
        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)

Training


In [204]:
# Accuracy
def get_accuracy(target, logits):
    """
    Calculate accuracy
    """
    max_seq = max(target.shape[1], logits.shape[1])
    if max_seq - target.shape[1]:
        target = np.pad(
            target,
            [(0,0),(0,max_seq - target.shape[1])],
            'constant')
    if max_seq - logits.shape[1]:
        logits = np.pad(
            logits,
            [(0,0),(0,max_seq - logits.shape[1])],
            'constant')

    return np.mean(np.equal(target, logits))

In [205]:
# Split data to training and validation sets (1 Batch for Validation and rest for Training)
train_source = sorted_text[batch_size:]
train_target = sorted_summary[batch_size:]
valid_source = sorted_text[:batch_size]
valid_target = sorted_summary[:batch_size]

In [206]:
(valid_text_batch,valid_summary_batch,valid_source_seq_len,valid_target_seq_len) = next(get_batches(valid_source,valid_target,batch_size))

In [208]:
print(len(source_seq_len),len(target_seq_len))


64 64

In [210]:
with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch_i in range(epochs):
        for batch_i,(text_batch,summary_batch,source_seq_len,target_seq_len) in enumerate(
            get_batches(train_source,train_target,batch_size)):
            
           
            _, loss = sess.run([train_op,cost],
                              feed_dict={
                                  input_ : text_batch,
                                  target : summary_batch,
                                  learning_rate:learn_rate,
                                  keep_prob : keep_probability,
                                  source_seq_length : source_seq_len,
                                  target_seq_length : target_seq_len
                              })
            
            if batch_i % display_step == 0 and batch_i > 0:
                
                batch_train_logits = sess.run(inference_logits,
                                             feed_dict={
                                                 input_: text_batch,
                                                 source_seq_length: source_seq_len,
                                                 target_seq_length: target_seq_len,
                                                 keep_prob: 1.0
                                             })
                
                batch_valid_logits = sess.run(inference_logits,
                                             feed_dict={
                                                 input_: valid_text_batch,
                                                 source_seq_length: valid_source_seq_len,
                                                 target_seq_length: valid_target_seq_len,
                                                 keep_prob: 1.0
                                             })
                
                train_accuracy = get_accuracy(summary_batch,batch_train_logits)
                valid_accuracy = get_accuracy(valid_summary_batch,batch_valid_logits)
                
                print('Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.4f}, Validation Accuracy: {:>6.4f}, Loss: {:>6.4f}'
                      .format(epoch_i, batch_i, len(sorted_text) // batch_size, train_accuracy, valid_accuracy, loss))
                
    # Save Model
    saver = tf.train.Saver()
    saver.save(sess, save_path)
    print('Model Trained and Saved')


Epoch   0 Batch    5/8881 - Train Accuracy: 0.0031, Validation Accuracy: 0.0000, Loss: 9.8540
Epoch   0 Batch   10/8881 - Train Accuracy: 0.0052, Validation Accuracy: 0.0035, Loss: 9.5644
Epoch   0 Batch   15/8881 - Train Accuracy: 0.0141, Validation Accuracy: 0.0043, Loss: 9.1686
Epoch   0 Batch   20/8881 - Train Accuracy: 0.0100, Validation Accuracy: 0.0043, Loss: 8.6641
Epoch   0 Batch   25/8881 - Train Accuracy: 0.0156, Validation Accuracy: 0.0043, Loss: 8.3862
Epoch   0 Batch   30/8881 - Train Accuracy: 0.0143, Validation Accuracy: 0.0043, Loss: 7.7328
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-210-554a9fc308e9> in <module>()
     14                                   keep_prob : keep_probability,
     15                                   source_seq_length : source_seq_len,
---> 16                                   target_seq_length : target_seq_len
     17                               })
     18 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    893     try:
    894       result = self._run(None, fetches, feed_dict, options_ptr,
--> 895                          run_metadata_ptr)
    896       if run_metadata:
    897         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1122     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1123       results = self._do_run(handle, final_targets, final_fetches,
-> 1124                              feed_dict_tensor, options, run_metadata)
   1125     else:
   1126       results = []

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1319     if handle is None:
   1320       return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1321                            options, run_metadata)
   1322     else:
   1323       return self._do_call(_prun_fn, self._session, handle, feeds, fetches)

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1325   def _do_call(self, fn, *args):
   1326     try:
-> 1327       return fn(*args)
   1328     except errors.OpError as e:
   1329       message = compat.as_text(e.message)

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1304           return tf_session.TF_Run(session, options,
   1305                                    feed_dict, fetch_list, target_list,
-> 1306                                    status, run_metadata)
   1307 
   1308     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

In [ ]: